############################################################
# Who's leaving home and why? (these are post-treatment outcomes, 
# though noting that info treatments had precisely null effects).
# 
# Author: 
# Soubhik Barari
# 
# Environment:
# - must use R 3.6
# 
# Input:
# - 00-read_clean_data.R
#
# Output:
# - fig/leavehome{.png,.pdf}
# - fig/leavehome_x_group{.png,.pdf}
# - fig/who_leavehome{.png,.pdf}
############################################################

source("00-read_clean_data.R")

## Do you need to leave?
table(responses_df$Q24) %>% prop.table()
#                    No        Yes 
# 0.03186559 0.35370800 0.61442642

## get individual choices
q25_df <- tidyr::separate_rows(responses_df, "Q25", sep = ",", convert = FALSE) %>%
    filter(Q25 != "")
q25_df$Q25 <- ifelse(
    q25_df$Q25 == "Exercising my freedom", "Exercise\nmy freedom",
    ifelse(
        q25_df$Q25 == "Getting bored", "Boredom",
        ifelse(
            q25_df$Q25 == "Getting some adrenaline (from breaking the law)", "Adrenaline\n(from\nbreaking law)",
            ifelse(
                q25_df$Q25 == "Going to the hospital / receiving medical treatments", "Hospital / \nmedical\ntreatment",
                ifelse(
                    q25_df$Q25 == "Getting tired of being inside of the house", "Tired of being\nin house",
                    ifelse(
                        q25_df$Q25 == "Going to the pharmacy", "Going to\nthe pharmacy",
                        ifelse(
                            q25_df$Q25 == "Taking care of dependents", "Care for\ndependents",
                            ifelse(
                                q25_df$Q25 == "Going to work", "Going to\nwork",
                                ifelse(
                                    q25_df$Q25 == "Other (specify):", "Other",
                                    ifelse(
                                        q25_df$Q25 == "Procuring food for yourself or family", "Getting food\nfor me/family",
                                        ifelse(
                                            q25_df$Q25 == "Meeting friends or relatives", "Meeting friends\nor relatives",
                                            ifelse(
                                                q25_df$Q25 == "Doing physical activity (e.g. exercising/jogging)", "Physical\nexercise",
                                                ifelse(
                                                    q25_df$Q25 == "Walking a pet", "Walking\na pet", NA
                                            )))))))))))))


essen_reasons <- c(
        "Getting food\nfor me/family", 
        "Going to\nthe pharmacy", 
        "Care for\ndependents",
        "Going to\nwork",
        "Hospital / \nmedical\ntreatment", 
        "Walking\na pet"
)

## how many reasons cited are essential reasons?
table(q25_df$Q25 %in% essen_reasons) %>% prop.table()
#     FALSE      TRUE 
# 0.1326554 0.8673446


#----------------------------------------------------------
# summarise overall reasons
#----------------------------------------------------------

viz_barplot <- function(qcol, qname, qfn, w=7, h=5, sort=TRUE) {
    response_df <<- responses_df
    q = unlist(lapply(response_df[[qcol]], function(s) strsplit(s, ",")))
    if (sort)
        qdf = data.frame(x=q) %>% group_by(x) %>% summarise(y=n()) %>% arrange(-y) 
    else 
        qdf = data.frame(x=q) %>% group_by(x) %>% summarise(y=n())
    
    qdf$x = factor(qdf$x, levels=rev(qdf$x))
    qdf$y = (qdf$y/sum(qdf$y)) * 100
    p <- qdf %>%
        ggplot(aes(x=x, y=y)) + 
        geom_bar(stat="identity") + 
        geom_text(aes(label=sprintf("%0.1f", y), x=x, y=y+2.6), size=3) +
        coord_flip() + 
        xlab(qname) + ylab(sprintf("percentage of responses (n=%i)", nrow(responses_df))) +
        theme_bw()
    p
    ggsave(paste0("fig/", qfn, ".pdf"), width=w, height=h)
    ggsave(paste0("fig/", qfn, ".png"), width=w, height=h)
}

viz_barplot(qcol = "Q25", qname = "reasons to leave home?", qfn="leavehome", w=7, h=3)

#----------------------------------------------------------
# summarise need to leave home
#----------------------------------------------------------

#TODO

#----------------------------------------------------------
# summarise mean responses by group
#----------------------------------------------------------

library(scales)
library(cowplot)
viz_df <- q25_df %>% 
    filter(gender != "Other") %>%
    filter(Q25 != "Other") %>%
    tidyr::gather("variable", "variable_value", c("gender", "health", "age_group")) %>%
    select(variable, variable_value, Q25) %>%
    mutate(variable = 
               ifelse(variable == "age_group", "age group",
                      ifelse(variable == "health", "general health",
                             ifelse(variable == "past_healthy_behavior", "past healthy\nbehavior", "gender"
                             )))) %>%
    mutate(variable_value = factor(variable_value, levels=c(
        "18-29", "30-39", "40-49", "50-59", "60+", "Poor", "Fair", "Good", "Excellent", "Male", "Female", "Other"
    ))) %>%
    mutate(Q25 = factor(Q25, levels=c( ##order by essentiality
        "Getting food\nfor me/family", 
        "Going to\nthe pharmacy", 
        "Going to\nwork",
        "Hospital / \nmedical\ntreatment", 
        "Care for\ndependents",
        "Walking\na pet", 
        "Lack of\nsocial activity", 
        "Physical\nexercise",
        "Tired of being\nin house",
        "Meeting friends\nor relatives",
        "Boredom",
        "Exercise\nmy freedom",
        "Adrenaline\n(from\nbreaking law)"
    ))) %>%
    group_by(variable, variable_value) %>%
    mutate(n_group = n()) %>%
    group_by(variable, variable_value, Q25) %>%
    summarise(
        y = (n()/head(n_group, 1))*100
    ) %>%
    filter(!is.na(variable_value), variable_value != "", !is.na(Q25)) %>%
    mutate(is_essential=ifelse(Q25 %in% essen_reasons, 1, 0))

viz_all <- viz_df %>% ggplot(aes(x=variable_value, y=y)) +
    geom_bar(stat="identity", position = position_dodge(width=0.9)) +
    coord_flip() +
    scale_y_continuous(limits=c(0, 55),oob = rescale_none) +
    facet_grid(variable ~ Q25, scales = "free_y", space = "free_y") +
    geom_text(aes(label=sprintf("%0.1f", y), x=variable_value, y=y+6.9), size=3) +
    # facet_grid(variable ~ behavior) +
    xlab("") + ylab("percentage of respondents in group that cite reason") +
    ggtitle("Essential reasons                                                                                                     Non-essential reasons") +
    # ggtitle("") +
    theme_bw() +
    theme(axis.text.x = element_text(angle=45, size=8), plot.title = element_text(hjust = 0.5))
viz_all
ggsave("fig/leavehome_x_group.pdf", height=4, width=12)
ggsave("fig/leavehome_x_group.png", height=4, width=12)

# vizl <- viz_df %>% filter(is_essential == 1) %>% ggplot(aes(x=variable_value, y=y)) +
#     geom_bar(stat="identity", position = position_dodge(width=0.9)) +
#     coord_flip() +
#     # scale_y_continuous(limits=c(50, 800),oob = rescale_none) +
#     facet_grid(variable ~ Q25, scales = "free_y", space = "free_y") +
#     # facet_grid(variable ~ behavior) +
#     xlab("") + ylab("percentage of respondents in group that cite reason") +
#     ggtitle("Essential reasons") +
#     theme_bw() +
#     theme(axis.text.x = element_text(angle=45, size=8), 
#         plot.title = element_text(hjust = 0.5),
#         strip.background = element_blank(),
#         strip.text.x = element_blank())
# 
# vizr <- viz_df %>% filter(is_essential != 1) %>% ggplot(aes(x=variable_value, y=y)) +
#     geom_bar(stat="identity", position = position_dodge(width=0.9)) +
#     coord_flip() +
#     # scale_y_continuous(limits=c(50, 800),oob = rescale_none) +
#     facet_grid(variable ~ Q25, scales = "free_y", space = "free_y") +
#     # facet_grid(variable ~ behavior) +
#     xlab("") + ylab("percentage of respondents in group that cite reason") +
#     ggtitle("Non-essential reasons") +
#     theme_bw() +
#     theme(axis.text.x = element_text(angle=45, size=8), plot.title = element_text(hjust = 0.5))
# 
# p <- cowplot::plot_grid(vizl, vizr, nrow =1)
# p

#----------------------------------------------------------
# summarise # of reasons by group
#----------------------------------------------------------

essen_reasons <- c(
        "Procuring food for yourself or family", 
        "Going to the pharmacy", 
        "Going to work",
        "Taking care of dependents", 
        "Going to the hospital / receiving medical treatments",
        "Walking a pet"
)

library(scales)
responses_df$`All\nreasons` <- sapply(responses_df$Q25, function(q) length(unlist(strsplit(q, split=","))))
responses_df$`Essential\nreasons` <- sapply(responses_df$Q25, function(q) {
    reasons <- unlist(strsplit(q, split=","))
    length(reasons[reasons %in% essen_reasons])
})
responses_df$`Non-essential\nreasons` <- sapply(responses_df$Q25, function(q) {
    reasons <- unlist(strsplit(q, split=","))
    length(reasons[!(reasons %in% essen_reasons)])
})

## how many ppl (overall) cite essential reasons?
table(responses_df$`Essential\nreasons` > 0) %>% prop.table()
#     FALSE      TRUE 
# 0.4107764 0.5892236 

## of those who cite any reasons, how many cite an essential reason?
table(responses_df[responses_df$`All\nreasons` > 0,"Essential\nreasons"] > 0) %>% prop.table()
#      FALSE       TRUE 
# 0.04101839 0.95898161 

## of those who say they need to leave, how many cite an essential reason?
table(responses_df[responses_df$Q24 == "Yes","Essential\nreasons"] > 0) %>% prop.table()
#      FALSE       TRUE 
# 0.04101839 0.95898161

## of those who say they need to leave, how many cite a non-essential reason?
table(responses_df[responses_df$Q24 == "Yes","Non-essential\nreasons"] > 0) %>% prop.table()
#     FALSE      TRUE 
# 0.8170674 0.1829326 


responses_df %>% 
    filter(gender != "Other") %>%
    tidyr::gather("variable", "variable_value", c("gender", "health", "age_group")) %>%
    tidyr::gather("type_reason", "number_in_type", c("All\nreasons", "Essential\nreasons", "Non-essential\nreasons")) %>%
    select(variable, variable_value, type_reason, number_in_type) %>%
    mutate(variable = 
               ifelse(variable == "age_group", "age group",
                      ifelse(variable == "health", "general health",
                             ifelse(variable == "past_healthy_behavior", "past healthy\nbehavior", "gender"
                             )))) %>%
    mutate(variable_value = factor(variable_value, levels=c(
        "18-29", "30-39", "40-49", "50-59", "60+", "Poor", "Fair", "Excellent", "Male", "Female", "Other"
    ))) %>%
    group_by(variable, variable_value, type_reason) %>%
    summarise(
        y = mean(number_in_type, na.rm=T),
        ymin = mean(number_in_type, na.rm=T) - (1.96*sd(number_in_type, na.rm=T)/sqrt(n())),
        ymax = mean(number_in_type, na.rm=T) + (1.96*sd(number_in_type, na.rm=T)/sqrt(n()))
    ) %>%
    filter(!is.na(variable_value), variable_value != "", !is.na(y)) %>%
    ggplot(aes(x=variable_value, y=y)) +
    geom_bar(stat="identity", position = position_dodge(width=0.9)) +
    geom_errorbar(aes(ymin=ymin, ymax=ymax), position = position_dodge(width=0.9), width = 0, size=1) +
    geom_text(aes(label=sprintf("%0.1f", y), x=variable_value, y=ymax+0.5), size=3) +
    coord_flip() +
    # scale_y_continuous(limits=c(0, 28),oob = rescale_none) +
    facet_grid(variable ~ type_reason, scales = "free_y", space = "free_y") +
    xlab("") + ylab("number of negative aspects cited") +
    theme_bw() +
    theme(axis.text.x = element_text(angle=45, size=8))


#----------------------------------------------------------
# model demographic predictors
#----------------------------------------------------------

library(mlogit)
require(nnet)

q25_df$Q25 <- factor(q25_df$Q25)
q25_df$anxiety_group <- factor(q25_df$anxiety_group, levels = c("low", "med", "hi"))
q25_df$think_effec <- ifelse(q25_df$perceivedeffectivnes %in% c("Effective", "Very effective"), 1, 0)
q25_df$think_publ_extreme <- ifelse(q25_df$Q23 %in% c("The reaction is much too extreme", "The reaction is somewhat too extreme"), 1, 0)
q25_df$think_govt_extreme <- ifelse(q25_df$perceivedreaction %in% c("The reaction is much too extreme", "The reaction is somewhat too extreme"), 1, 0)
q25_df$trust_govt <- ifelse(q25_df$Q36 %in% c("Strongly distrust", "Distrust"), 1, 0)
q25_df$factu_govt <- ifelse(q25_df$Q37 %in% c("Very untruthful", "Somewhat untruthful"), 1, 0)

## note: 
## - including public opinion variables dampen demographic variable effects
## - including anxiety dampens the behavioral group effects
## - behavior is clearly post-treatment to demographics, so removing that
m <- multinom(Q25 ~ age_group + health + gender, data = q25_df, family=binomial())

m_summary <- summary(m)
m_summary_coefs <- m_summary$coefficients
m_summary_se <- m_summary$standard.errors

## model comparison
m.sat <- multinom(Q25 ~ age_group + health + gender + past_healthy_behavior + age_group:health, data = q25_df, family=binomial())
m_summary.sat <- summary(m)
m_summary_coefs.sat <- m_summary$coefficients
m_summary_se.sat <- m_summary$standard.errors

##TODO: turn from log odds ratio into odds
stayhome_coef_df <- data.frame(m_summary_coefs)
stayhome_coef_df$reason <- rownames(stayhome_coef_df)
rownames(stayhome_coef_df) <- 1:nrow(stayhome_coef_df)

stayhome_df_se <- data.frame(m_summary_se)
stayhome_df_se$reason <- rownames(stayhome_df_se)
rownames(stayhome_df_se) <- 1:nrow(stayhome_df_se)

stayhome_df <- stayhome_coef_df %>% tidyr::gather("variable", "coef", -reason) %>%
    left_join(stayhome_df_se %>% tidyr::gather("variable", "se", -reason)) %>%
    mutate(ymin=coef-se, ymax=coef+se, y=coef)

stayhome_df$variable <- factor(stayhome_df$variable, levels=unique(stayhome_df$variable))

#----------------------------------------------------------
# visualize demographic predictors
#----------------------------------------------------------

stayhome_df %>%
    mutate(y=y, ymin=ymin, ymax=ymax) %>%
    filter(variable != "X.Intercept.") %>%
    filter(variable != "gender") %>%
    filter(variable != "genderOther") %>%
    filter(variable != "health") %>%
    filter(!is.na(reason)) %>%
    filter(reason != "Other") %>%
    mutate(sig=factor(ifelse(ymin < 0 & ymax > 0, 0, 1))) %>%
    ggplot(aes(x=variable, y=y, ymin=ymin, ymax=ymax)) +
    geom_point(aes(colour=sig)) +
    geom_pointrange(aes(colour=sig)) +
    facet_wrap(.~ reason, scales="free_x") +
    coord_flip() +
    scale_color_manual(values=c("grey","black")) +
    scale_x_discrete(labels = rev(c(
        "high anxiety",
        "medium anxiety",
        # "high healthy behavior in past week",
        # "medium healthy behavior in past week",
        "female",
        "has excellent overall health",
        "has good overall health",
        "has fair overall health",
        ">60 years old",
        "50-59 years old",
        "40-49 years old",
        "30-39 years old",
        "govt info is truthful",
        "trust govt",
        "govt reaction is extreme",
        "public reaction is extreme",
        "social distancing effective"
    ))) +
    geom_hline(yintercept=0, colour="red") +
    ylab("effect of group membership on\nciting reason for leaving house") +
    theme_bw() +
    theme(legend.position = "none")
ggsave("fig/who_leavehome.pdf", height=8, width=7)
ggsave("fig/who_leavehome.png", height=8, width=7)


## "essential"
good_reasons <- c(
    "Getting food\nfor me/family",
    "Going to\nwork",
    "Going to\nthe pharmacy",
    "Hospital / \nmedical treatment",
    "Walking\na pet"
)
stayhome_df %>%
    mutate(y=y, ymin=ymin, ymax=ymax) %>%
    mutate(sig=factor(ifelse(ymin < 0 & ymax > 0, 0, 1))) %>%
    filter(variable != "X.Intercept.") %>%
    filter(variable != "gender") %>%
    filter(variable != "genderOther") %>%
    filter(variable != "health") %>%
    ## removing uninformative variables >>>>>>>>>>>>>
    filter(!grepl("anxiety", variable), !grepl("govt|public|think", variable)) %>%
    ## <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    filter(!is.na(reason) & reason != "Other") %>%
    filter((reason %in% good_reasons)) %>%
    ggplot(aes(x=variable, y=y, ymin=ymin, ymax=ymax)) +
    geom_point(aes(colour=sig)) +
    geom_pointrange(aes(colour=sig)) +
    facet_wrap(.~ reason) +
    coord_flip() +
    scale_color_manual(values=c("grey","black")) +
    scale_x_discrete(labels = rev(c(
        # "high anxiety",
        # "medium anxiety",
        # "high healthy behavior",
        # "medium healthy behavior",
        # "other gender",
        "female",
        "excellent general health",
        "good overall health",
        "fair overall health",
        ">60 years old",
        "50-59 years old",
        "40-49 years old",
        "30-39 years old"
        # "govt info is truthful"
        # "trust govt",
        # "govt reaction is extreme",
        # "public reaction is extreme",
        # "social distancing effective"
    ))) +
    geom_hline(yintercept=0, colour="red") +
    ylab("effect of variable on citing\n reason for leaving house") +
    theme_bw() +
    theme(legend.position = "none")
ggsave("fig/who_leavehome_essential.pdf", height=7, width=7)
ggsave("fig/who_leavehome_essential.png", height=7, width=7)

## "nonessential"
stayhome_df %>%
    mutate(y=y, ymin=ymin, ymax=ymax) %>%
    mutate(sig=factor(ifelse(ymin < 0 & ymax > 0, 0, 1))) %>%
    filter(variable != "X.Intercept.") %>%
    filter(variable != "gender") %>%
    filter(variable != "genderOther") %>%
    filter(variable != "health") %>%
    ## removing uninformative variables >>>>>>>>>>>>>
    filter(!grepl("anxiety", variable), !grepl("govt|public|think", variable)) %>%
    ## <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    filter(!is.na(reason) & reason != "Other") %>%
    filter(!(reason %in% good_reasons)) %>%
    ggplot(aes(x=variable, y=y, ymin=ymin, ymax=ymax)) +
    geom_point(aes(colour=sig)) +
    geom_pointrange(aes(colour=sig)) +
    facet_wrap(.~ reason) +
    coord_flip() +
    scale_color_manual(values=c("grey","black")) +
    scale_x_discrete(labels = rev(c(
        # "high anxiety",
        # "medium anxiety",
        # "high healthy behavior",
        # "medium healthy behavior",
        # "other gender",
        "female",
        "excellent overall health",
        "good overall health",
        "fair overall health",
        ">60 years old",
        "50-59 years old",
        "40-49 years old",
        "30-39 years old"
        # "govt info is truthful"
        # "trust govt",
        # "govt reaction is extreme",
        # "public reaction is extreme",
        # "social distancing effective"
    ))) +
    geom_hline(yintercept=0, colour="red") +
    ylab("effect of variable on citing\n reason for leaving house") +
    theme_bw() +
    theme(legend.position = "none")
ggsave("fig/who_leavehome_nonessential.pdf", height=7, width=7)
ggsave("fig/who_leavehome_nonessential.png", height=7, width=7)
